import re

import scrapy

file = open('mj.txt', 'a+', encoding='utf-8')


class Mj(scrapy.Spider):

    name = 'mj'
    allowed_domains = ['so.gushiwen.cn', 'www.gushiwen.cn']
    start_urls = ['https://www.gushiwen.cn/shiju/']

    def parse(self, response):
        for sel in response.xpath('//div[@class="typecont"]/span/a'):
            url = response.urljoin(sel.xpath('@href').extract_first())
            yield scrapy.Request(url, callback=self.parse_item)

    def parse_item(self, response):
        for sel in response.xpath('//div[@class="left"]/div[@class="sons"]/div[@class="cont"]'):
            url = sel.xpath('a/@href').extract_first()
            yield scrapy.Request(url, callback=self.parse_content)

    def parse_content(self, response):
        for sel in response.xpath('//div[@class="left"]/div[@class="sons"]'):
            item = sel.xpath('div[@class="cont"]/div[@class="contson"]//text()').re(r'\s?(\S+?。)')
            content = []
            for index in range(len(item)):
                if item[index]:
                    content.append(item[index] + '\n')
            if content:
                file.writelines(content)
                file.write('\n')
